#-*-coding:utf-8-*-
#!/usr/bin/python

from lib import config, req_sy, db_sy, encoding
from lib import email_sy as email
from lib.logger import logger
from bs4 import BeautifulSoup
import time
import re,sys
import gevent.monkey
gevent.monkey.patch_socket()
import gevent
from gevent.pool import Pool

onlive_src = 'douyu'
gpool = Pool(config.GPOOLSIZE)

# 修复插入数据库乱码问题
reload(sys)
sys.setdefaultencoding('utf-8')

# 处理来斗鱼的的直播视频 直播地址：http://www.douyu.com/huli666
# http://staticlive.douyutv.com/common/share/play.swf?room_id=321358
def _pro_onlivedata_from_douyu(zbid):
	thumb = ''
	room_id = ''
	player_url = ''
	url = config.DOUYU_VIEW % zbid
	#logger.info('%s | %s ' % (u'douyu', url))
	html = req_sy.get_html(url)
	if html.strip() != '':
		try:
			soup = BeautifulSoup(html, 'html.parser', from_encoding = 'utf-8')
			# 查找当前直播地址
			v_obj = soup.find(class_='h_tx fl')
			if v_obj:
				thumb = v_obj.img['src']
			return zbid, thumb
		except Exception as e:
			reason = "%s : %s, zbid:%s" % (u'获取douyu直播地址', e, zbid)
			logger.error(reason)
			email.send_email(u'douyu直播爬取error', reason)

	return zbid, ''

def _get_page_views_asynchronous(ul_list):
	threads = []
	for item in ul_list.findAll('li'):
		gameHostName = item.find(class_='tag ellipsis').string
		if gameHostName not in config.DOUYU_GAME:
			continue
		zbid = item.a['href'][1:]
		threads.append(gpool.spawn(_pro_onlivedata_from_douyu, zbid))
	gpool.join()

	res = {}
	is_error = False
	reason = None
	# 遍历threads, 判断是否成功
	for thread in threads:
		if thread.successful():
			res[thread.value[0]] = thread.value[1]
		else:
			is_error = True
			reason = thread.exception
	# 若执行过程出错，发送邮件
	# if is_error:
	# 	email.send_email(u'douyu直播爬取error', str(reason))
	return res

def get_data_action(is_get_livedata = True):
	game_type = config.DOUYU_GAME
	page = 1
	# 斗鱼视频列表:http://www.douyu.com/directory/columnRoom/game?page=1&isAjax=1
	top_views = 100
	res_data = []
	while page < 200 and top_views > 50:
		if not is_get_livedata:
			time.sleep(1)
		top_views = 49
		douyu_api = config.DOUYU_API % page
		html = req_sy.get_html(douyu_api)
		if html.strip() != '':
			soup = BeautifulSoup(html, 'html.parser', from_encoding = 'utf-8')
			ul_list = soup
			# 是否单个页面爬取在线直播地址
			if is_get_livedata:
				livedatas = _get_page_views_asynchronous(ul_list)
			# #列表页样式：<a href="/mmbly" class="list" title="梦梦君 新赛季台服拯救小学生"><span class="img"><img class="lazy" data-original="http://staticlive.douyutv.com/upload/web_pic/7/423117_1511261509_thumb.jpg" src="http://staticlive.douyutv.com/upload/web_pic/7/423117_1511261509_thumb.jpg" width="320" height="180" style="width: 205px; height: 115.313px; display: block;"></span>
			#             <div class="mes">
	 		#               <h1 class="title">梦梦君 新赛季台服拯救小学生</h1>
			#               <p class="moreMes"><span class="view">4417</span><span class="nnt">梦梦贝莉雅x</span><span class="zbName"><em>英雄联盟</em></span></p>
			#             </div>
			#             <div class="shadow" style="width: 205px; height: 115.313px;"> <i class="iconPlay" style="top: 22.6563px; left: 67.5px;"></i> </div>
			#             <i class="icon_live">正在直播</i>
			for item in ul_list.findAll('li'):
				kw = {}
				try:
					gameHostName = item.find(class_='tag ellipsis').string
					if gameHostName not in game_type:
						continue
					views = item.find(class_='dy-num fr').string
					# 判断在线人数是否包含‘万’
					if views.find(u'万') > 0:
						pos = views.find(u'万')
						views = int(float(views[0:pos]) * 10000)
					else:
						views = int(views)
					if top_views < views:
						top_views = views
					#logger.info(str(page) + ', views:'+ str(views) + ',top_views:'+ str(top_views) + ',' + gameHostName)
					kw['zbid'] = item.a['href'][1:]
					if is_get_livedata:
						kw['zb_thumb'] = livedatas[kw['zbid']]
					else:
						kw['zb_thumb'] = ''
					kw['zbname'] = item.find(class_='dy-name ellipsis fl').string
					kw['source'] = onlive_src
					kw['views'] = views
					kw['category'] = game_type[gameHostName]
					kw['inputtime'] = int(time.time())
					kw['title'] = item.find(class_='ellipsis').string
					kw['thumb'] = item.find(class_='imgbox').img['data-original']
					matchObj = re.search(r'\/\d+\/\d+\/(\d+)_\d+', kw['thumb'])
					if matchObj:
						kw['livedata'] = "http://staticlive.douyutv.com/common/share/play.swf?room_id=%s" % matchObj.group(1)
					else:
						continue
				except Exception as e:
					reason = "%s : %s" % (u'解析douyu直播列表页出错', e)
					logger.error(reason)
					email.send_email(u'douyu直播爬取error', reason)
					# 出错, 跳出循环
					top_views = 1
					break
				res_data.append(kw)
		else:
			logger.error("%s | %s" % ("get douyu API Failed", douyu_api))
			email.send_email(u'douyu直播API获取失败', "%s | %s" % ("get douyu API Failed", douyu_api))
			top_views = 100
		page = page + 1

	return res_data

if __name__ == "__main__":
	now =  time.time()
	dbconn = db_sy.getConnection()
	update_data = get_data_action(is_get_livedata = False)
	#db_sy.db_insert(dbconn, update_data)
	dbconn.close()
	print "time cost : " + str(int((time.time() - now))) + " seconds"
